// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/trace_event/heap_profiler_heap_dump_writer.h"

#include <stdint.h>

#include <algorithm>
#include <iterator>
#include <tuple>
#include <utility>
#include <vector>

#include "base/format_macros.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/strings/stringprintf.h"
#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
#include "base/trace_event/heap_profiler_type_name_deduplicator.h"
#include "base/trace_event/memory_dump_session_state.h"
#include "base/trace_event/trace_config.h"
#include "base/trace_event/trace_event_argument.h"
#include "base/trace_event/trace_log.h"

// Most of what the |HeapDumpWriter| does is aggregating detailed information
// about the heap and deciding what to dump. The Input to this process is a list
// of |AllocationContext|s and size pairs.
//
// The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
// pairs where the properties of the contexts share a prefix. (Type name is
// considered a list of length one here.) First all pairs are put into one
// bucket that represents the entire heap. Then this bucket is recursively
// broken down into smaller buckets. Each bucket keeps track of whether further
// breakdown is possible.

namespace base {
namespace trace_event {
    namespace internal {
        namespace {

            // Denotes a property of |AllocationContext| to break down by.
            enum class BreakDownMode { kByBacktrace,
                kByTypeName };

            // A group of bytes for which the context shares a prefix.
            struct Bucket {
                Bucket()
                    : size(0)
                    , count(0)
                    , backtrace_cursor(0)
                    , is_broken_down_by_type_name(false)
                {
                }

                std::vector<std::pair<const AllocationContext*, AllocationMetrics>>
                    metrics_by_context;

                // The sum of the sizes of |metrics_by_context|.
                size_t size;

                // The sum of number of allocations of |metrics_by_context|.
                size_t count;

                // The index of the stack frame that has not yet been broken down by. For all
                // elements in this bucket, the stack frames 0 up to (but not including) the
                // cursor, must be equal.
                size_t backtrace_cursor;

                // When true, the type name for all elements in this bucket must be equal.
                bool is_broken_down_by_type_name;
            };

            // Comparison operator to order buckets by their size.
            bool operator<(const Bucket& lhs, const Bucket& rhs)
            {
                return lhs.size < rhs.size;
            }

            // Groups the allocations in the bucket by |break_by|. The buckets in the
            // returned list will have |backtrace_cursor| advanced or
            // |is_broken_down_by_type_name| set depending on the property to group by.
            std::vector<Bucket> GetSubbuckets(const Bucket& bucket,
                BreakDownMode break_by)
            {
                base::hash_map<const void*, Bucket> breakdown;

                if (break_by == BreakDownMode::kByBacktrace) {
                    for (const auto& context_and_metrics : bucket.metrics_by_context) {
                        const Backtrace& backtrace = context_and_metrics.first->backtrace;
                        const StackFrame* begin = std::begin(backtrace.frames);
                        const StackFrame* end = begin + backtrace.frame_count;
                        const StackFrame* cursor = begin + bucket.backtrace_cursor;

                        DCHECK_LE(cursor, end);

                        if (cursor != end) {
                            Bucket& subbucket = breakdown[cursor->value];
                            subbucket.size += context_and_metrics.second.size;
                            subbucket.count += context_and_metrics.second.count;
                            subbucket.metrics_by_context.push_back(context_and_metrics);
                            subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
                            subbucket.is_broken_down_by_type_name = bucket.is_broken_down_by_type_name;
                            DCHECK_GT(subbucket.size, 0u);
                            DCHECK_GT(subbucket.count, 0u);
                        }
                    }
                } else if (break_by == BreakDownMode::kByTypeName) {
                    if (!bucket.is_broken_down_by_type_name) {
                        for (const auto& context_and_metrics : bucket.metrics_by_context) {
                            const AllocationContext* context = context_and_metrics.first;
                            Bucket& subbucket = breakdown[context->type_name];
                            subbucket.size += context_and_metrics.second.size;
                            subbucket.count += context_and_metrics.second.count;
                            subbucket.metrics_by_context.push_back(context_and_metrics);
                            subbucket.backtrace_cursor = bucket.backtrace_cursor;
                            subbucket.is_broken_down_by_type_name = true;
                            DCHECK_GT(subbucket.size, 0u);
                            DCHECK_GT(subbucket.count, 0u);
                        }
                    }
                }

                std::vector<Bucket> buckets;
                buckets.reserve(breakdown.size());
                for (auto key_bucket : breakdown)
                    buckets.push_back(key_bucket.second);

                return buckets;
            }

            // Breaks down the bucket by |break_by|. Returns only buckets that contribute
            // more than |min_size_bytes| to the total size. The long tail is omitted.
            std::vector<Bucket> BreakDownBy(const Bucket& bucket,
                BreakDownMode break_by,
                size_t min_size_bytes)
            {
                std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by);

                // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
                // so its front contains the largest bucket. Buckets should be iterated
                // ordered by size, but sorting the vector is overkill because the long tail
                // of small buckets will be discarded. By using a max-heap, the optimal case
                // where all but the first bucket are discarded is O(n). The worst case where
                // no bucket is discarded is doing a heap sort, which is O(n log n).
                std::make_heap(buckets.begin(), buckets.end());

                // Keep including buckets until adding one would increase the number of
                // bytes accounted for by |min_size_bytes|. The large buckets end up in
                // [it, end()), [begin(), it) is the part that contains the max-heap
                // of small buckets.
                std::vector<Bucket>::iterator it;
                for (it = buckets.end(); it != buckets.begin(); --it) {
                    if (buckets.front().size < min_size_bytes)
                        break;

                    // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
                    // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
                    std::pop_heap(buckets.begin(), it);
                }

                // At this point, |buckets| looks like this (numbers are bucket sizes):
                //
                // <-- max-heap of small buckets --->
                //                                  <-- large buckets by ascending size -->
                // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
                //   ^                                ^              ^
                //   |                                |              |
                //   begin()                          it             end()

                // Discard the long tail of buckets that contribute less than a percent.
                buckets.erase(buckets.begin(), it);

                return buckets;
            }

        } // namespace

        bool operator<(Entry lhs, Entry rhs)
        {
            // There is no need to compare |size|. If the backtrace and type name are
            // equal then the sizes must be equal as well.
            return std::tie(lhs.stack_frame_id, lhs.type_id) < std::tie(rhs.stack_frame_id, rhs.type_id);
        }

        HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
            TypeNameDeduplicator* type_name_deduplicator,
            uint32_t breakdown_threshold_bytes)
            : stack_frame_deduplicator_(stack_frame_deduplicator)
            , type_name_deduplicator_(type_name_deduplicator)
            , breakdown_threshold_bytes_(breakdown_threshold_bytes)
        {
        }

        HeapDumpWriter::~HeapDumpWriter() { }

        bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket)
        {
            // The contexts in the bucket are all different, but the [begin, cursor) range
            // is equal for all contexts in the bucket, and the type names are the same if
            // |is_broken_down_by_type_name| is set.
            DCHECK(!bucket.metrics_by_context.empty());

            const AllocationContext* context = bucket.metrics_by_context.front().first;

            const StackFrame* backtrace_begin = std::begin(context->backtrace.frames);
            const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
            DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));

            Entry entry;
            entry.stack_frame_id = stack_frame_deduplicator_->Insert(
                backtrace_begin, backtrace_end);

            // Deduplicate the type name, or use ID -1 if type name is not set.
            entry.type_id = bucket.is_broken_down_by_type_name
                ? type_name_deduplicator_->Insert(context->type_name)
                : -1;

            entry.size = bucket.size;
            entry.count = bucket.count;

            auto position_and_inserted = entries_.insert(entry);
            return position_and_inserted.second;
        }

        void HeapDumpWriter::BreakDown(const Bucket& bucket)
        {
            auto by_backtrace = BreakDownBy(bucket,
                BreakDownMode::kByBacktrace,
                breakdown_threshold_bytes_);
            auto by_type_name = BreakDownBy(bucket,
                BreakDownMode::kByTypeName,
                breakdown_threshold_bytes_);

            // Insert entries for the buckets. If a bucket was not present before, it has
            // not been broken down before, so recursively continue breaking down in that
            // case. There might be multiple routes to the same entry (first break down
            // by type name, then by backtrace, or first by backtrace and then by type),
            // so a set is used to avoid dumping and breaking down entries more than once.

            for (const Bucket& subbucket : by_backtrace)
                if (AddEntryForBucket(subbucket))
                    BreakDown(subbucket);

            for (const Bucket& subbucket : by_type_name)
                if (AddEntryForBucket(subbucket))
                    BreakDown(subbucket);
        }

        const std::set<Entry>& HeapDumpWriter::Summarize(
            const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context)
        {
            // Start with one bucket that represents the entire heap. Iterate by
            // reference, because the allocation contexts are going to point to allocation
            // contexts stored in |metrics_by_context|.
            Bucket root_bucket;
            for (const auto& context_and_metrics : metrics_by_context) {
                DCHECK_GT(context_and_metrics.second.size, 0u);
                DCHECK_GT(context_and_metrics.second.count, 0u);
                const AllocationContext* context = &context_and_metrics.first;
                root_bucket.metrics_by_context.push_back(
                    std::make_pair(context, context_and_metrics.second));
                root_bucket.size += context_and_metrics.second.size;
                root_bucket.count += context_and_metrics.second.count;
            }

            AddEntryForBucket(root_bucket);

            // Recursively break down the heap and fill |entries_| with entries to dump.
            BreakDown(root_bucket);

            return entries_;
        }

        std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries)
        {
            std::string buffer;
            std::unique_ptr<TracedValue> traced_value(new TracedValue);

            traced_value->BeginArray("entries");

            for (const Entry& entry : entries) {
                traced_value->BeginDictionary();

                // Format size as hexadecimal string into |buffer|.
                SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
                traced_value->SetString("size", buffer);

                SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count));
                traced_value->SetString("count", buffer);

                if (entry.stack_frame_id == -1) {
                    // An empty backtrace (which will have ID -1) is represented by the empty
                    // string, because there is no leaf frame to reference in |stackFrames|.
                    traced_value->SetString("bt", "");
                } else {
                    // Format index of the leaf frame as a string, because |stackFrames| is a
                    // dictionary, not an array.
                    SStringPrintf(&buffer, "%i", entry.stack_frame_id);
                    traced_value->SetString("bt", buffer);
                }

                // Type ID -1 (cumulative size for all types) is represented by the absence
                // of the "type" key in the dictionary.
                if (entry.type_id != -1) {
                    // Format the type ID as a string.
                    SStringPrintf(&buffer, "%i", entry.type_id);
                    traced_value->SetString("type", buffer);
                }

                traced_value->EndDictionary();
            }

            traced_value->EndArray(); // "entries"
            return traced_value;
        }

    } // namespace internal

    std::unique_ptr<TracedValue> ExportHeapDump(
        const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context,
        const MemoryDumpSessionState& session_state)
    {
        internal::HeapDumpWriter writer(
            session_state.stack_frame_deduplicator(),
            session_state.type_name_deduplicator(),
            session_state.memory_dump_config().heap_profiler_options.breakdown_threshold_bytes);
        return Serialize(writer.Summarize(metrics_by_context));
    }

} // namespace trace_event
} // namespace base
